By Jack Wilburn
library(readr)
library(mosaic)
library(gridExtra)
library(caret)
library(reshape)
library(car)
library(broom)
library(e1071)
library(leaps)
library(psych)
# execs import and cleaning
execs = read_csv("SeniorExecs.csv")
execs = execs[,c(1,2,4:37,44:54)]
execs$dimIPart1 = execs$DIMI1
execs$dimEPart1 = execs$DIME1
execs$dimSPart1 = execs$DIMS1
execs$intIPart1 = execs$INTI1
execs$intEPart1 = execs$INTE1
execs$intSPart1 = execs$INTS1
execs$difPart1 = execs$DIF1
execs$intPart1 = execs$INT1
execs$dimPart1 = execs$DIM1
execs$diPart1 = execs$DI1
execs$disPart1 = execs$DIS1
execs$VQ1 = execs$VQ1
execs$VQ2 = execs$VQ2
execs$dimPercPart1 = execs$DIMper1
execs$intPercPart1 = execs$INTper1
execs$aiPercPart1 = execs$AIper1
execs$dimIPart2 = execs$DIMI2
execs$dimEPart2 = execs$DIME2
execs$dimSPart2 = execs$DIMS2
execs$intIPart2 = execs$INTI2
execs$intEPart2 = execs$INTE2
execs$intSPart2 = execs$INTS2
execs$difPart2 = execs$DIF2
execs$intPart2 = execs$INT2
execs$dimPart2 = execs$DIM2
execs$diPart2 = execs$DI2
execs$disPart2 = execs$DIS2
execs$SQ1 = execs$SQ1
execs$SQ2 = execs$SQ2
execs$dimPercPart2 = execs$DIMper2
execs$intPercPart2 = execs$INTper2
execs$aiPercPart2 = execs$AIper2
execs$BQr1 = execs$BQr1
execs$BQr2 = execs$BQr2
execs$BQa1 = execs$BQa1
execs$BQa2 = execs$BQa2
execs$CQ1 = execs$CQ1
execs$CQ2 = execs$CQ2
execs$rho1 = (execs$Rho1)/1000
execs$rho2 = (execs$Rho2)/1000
execs$difPart1difPart2 = execs$DIF1divDIF2
execs$type = "Executive"
execs = cbind(execs[,48:58], execs[,38:39], execs[59:72], execs[,40:41], execs[,73:75], execs[,42:47], execs[,76:79])
execs[, 1:41] = sapply(execs[, 1:41], as.numeric)
execs$type = factor(execs$type)
# colStus import and cleaning
colStus = read_csv("CollegeStudents.csv")
colStus = colStus[,c(14:15,17:50,57:67)]
colStus$dimIPart1 = colStus$DIMI1
colStus$dimEPart1 = colStus$DIME1
colStus$dimSPart1 = colStus$DIMS1
colStus$intIPart1 = colStus$INTI1
colStus$intEPart1 = colStus$INTE1
colStus$intSPart1 = colStus$INTS1
colStus$difPart1 = colStus$DIF1
colStus$intPart1 = colStus$INT1
colStus$dimPart1 = colStus$DIM1
colStus$diPart1 = colStus$DI1
colStus$disPart1 = colStus$DIS1
colStus$VQ1 = colStus$VQ1
colStus$VQ2 = colStus$VQ2
colStus$dimPercPart1 = colStus$DIMper1
colStus$intPercPart1 = colStus$INTper1
colStus$aiPercPart1 = (colStus$AIper1)
colStus$dimIPart2 = colStus$DIMI2
colStus$dimEPart2 = colStus$DIME2
colStus$dimSPart2 = colStus$DIMS2
colStus$intIPart2 = colStus$INTI2
colStus$intEPart2 = colStus$INTE2
colStus$intSPart2 = colStus$INTS2
colStus$difPart2 = colStus$DIF2
colStus$intPart2 = colStus$INT2
colStus$dimPart2 = colStus$DIM2
colStus$diPart2 = colStus$DI2
colStus$disPart2 = colStus$DIS2
colStus$SQ1 = colStus$SQ1
colStus$SQ2 = colStus$SQ2
colStus$dimPercPart2 = colStus$DIMper2
colStus$intPercPart2 = colStus$INTper2
colStus$aiPercPart2 = (colStus$AIper2)
colStus$BQr1 = colStus$BQr1
colStus$BQr2 = colStus$BQR2
colStus$BQa1 = colStus$BQa1
colStus$BQa2 = colStus$BQa2
colStus$CQ1 = colStus$CQ1
colStus$CQ2 = colStus$CQ2
colStus$rho1 = colStus$Rho1
colStus$rho2 = colStus$Rho2
colStus$difPart1difPart2 = colStus$DIF1divDIF2
colStus$type = "CollegeStudent"
colStus = cbind(colStus[,48:58], colStus[,38:39], colStus[59:72], colStus[,40:41], colStus[,73:75], colStus[,42], colStus[,76], colStus[,44:47], colStus[,77:80])
colStus[, 1:41] = sapply(colStus[, 1:41], as.numeric)
colStus$type = factor(colStus$type)
colStus = na.omit(colStus)
colStus$aiPercPart1 = (colStus$aiPercPart1) /100
colStus$aiPercPart2 = (colStus$aiPercPart2) /100
colStus$dimPercPart1 = (colStus$dimPercPart1) /100
colStus$dimPercPart2 = (colStus$dimPercPart2) /100
colStus$intPercPart1 = (colStus$intPercPart1) /100
colStus$intPercPart2 = (colStus$intPercPart2) /100
colStus$difPart1difPart2 = (colStus$difPart1difPart2) / 100
# entreps import and cleaning
entreps = read_csv("Entrepreneurs.csv")
entreps = entreps[,c(15:51, 58:68)]
entreps$dimIPart1 = entreps$DIMI1
entreps$dimEPart1 = entreps$DIME1
entreps$dimSPart1 = entreps$DIMS1
entreps$intIPart1 = entreps$INTI1
entreps$intEPart1 = entreps$INTE1
entreps$intSPart1 = entreps$INTS1
entreps$difPart1 = entreps$DIF1
entreps$intPart1 = entreps$INT1
entreps$dimPart1 = entreps$DIM1
entreps$diPart1 = entreps$DI1
entreps$disPart1 = entreps$DIS1
entreps$VQ1 = entreps$VQ1
entreps$VQ2 = entreps$VQ2
entreps$dimPercPart1 = entreps$DIMper1
entreps$intPercPart1 = entreps$INTper1
entreps$aiPercPart1 = (entreps$AIper1)
entreps$dimIPart2 = entreps$DIMI2
entreps$dimEPart2 = entreps$DIME2
entreps$dimSPart2 = entreps$DIMS2
entreps$intIPart2 = entreps$INTI2
entreps$intEPart2 = entreps$INTE2
entreps$intSPart2 = entreps$INTS2
entreps$difPart2 = entreps$DIF2
entreps$intPart2 = entreps$INT2
entreps$dimPart2 = entreps$DIM2
entreps$diPart2 = entreps$DI2
entreps$disPart2 = entreps$DIS2
entreps$SQ1 = entreps$SQ1
entreps$SQ2 = entreps$SQ2
entreps$dimPercPart2 = entreps$DIMper2
entreps$intPercPart2 = entreps$INTper2
entreps$aiPercPart2 = (entreps$AIper2)
entreps$BQr1 = entreps$BQr1
entreps$BQr2 = entreps$BQR2
entreps$BQa1 = entreps$BQa1
entreps$BQa2 = entreps$BQa2
entreps$CQ1 = entreps$CQ1
entreps$CQ2 = entreps$CQ2
entreps$rho1 = entreps$Rho1
entreps$rho2 = entreps$Rho2
entreps$difPart1difPart2 = entreps$DIF1divDIF2
entreps$type = "Entrepreneur"
entreps = cbind(entreps[,49:59], entreps[,39:40], entreps[60:73], entreps[,41:42], entreps[,74:76], entreps[,43], entreps[,77], entreps[,45:48], entreps[,78:81])
entreps[, 1:41] = sapply(entreps[, 1:41], as.numeric)
entreps$type = factor(entreps$type)
entreps$aiPercPart1 = (entreps$aiPercPart1) /100
entreps$aiPercPart2 = (entreps$aiPercPart2) /100
entreps$dimPercPart1 = (entreps$dimPercPart1) /100
entreps$dimPercPart2 = (entreps$dimPercPart2) /100
entreps$intPercPart1 = (entreps$intPercPart1) /100
entreps$intPercPart2 = (entreps$intPercPart2) /100
entreps$difPart1difPart2 = (entreps$difPart1difPart2) / 100
# senMans importing and cleaning
senMans = read_csv("SeniorMgrs.csv")
senMans = senMans[,c(15:16, 18:51, 58:68)]
senMans$dimIPart1 = senMans$DIMI1
senMans$dimEPart1 = senMans$DIME1
senMans$dimSPart1 = senMans$DIMS1
senMans$intIPart1 = senMans$INTI1
senMans$intEPart1 = senMans$INTE1
senMans$intSPart1 = senMans$INTS1
senMans$difPart1 = senMans$DIF1
senMans$intPart1 = senMans$INT1
senMans$dimPart1 = senMans$DIM1
senMans$diPart1 = senMans$DI1
senMans$disPart1 = senMans$DIS1
senMans$VQ1 = senMans$VQ1
senMans$VQ2 = senMans$VQ2
senMans$dimPercPart1 = senMans$DIMper1
senMans$intPercPart1 = senMans$INTper1
senMans$aiPercPart1 = (senMans$AIper1)
senMans$dimIPart2 = senMans$DIMI2
senMans$dimEPart2 = senMans$DIME2
senMans$dimSPart2 = senMans$DIMS2
senMans$intIPart2 = senMans$INTI2
senMans$intEPart2 = senMans$INTE2
senMans$intSPart2 = senMans$INTS2
senMans$difPart2 = senMans$DIF2
senMans$intPart2 = senMans$INT2
senMans$dimPart2 = senMans$DIM2
senMans$diPart2 = senMans$DI2
senMans$disPart2 = senMans$DIS2
senMans$SQ1 = senMans$SQ1
senMans$SQ2 = senMans$SQ2
senMans$dimPercPart2 = senMans$DIMper2
senMans$intPercPart2 = senMans$INTper2
senMans$aiPercPart2 = (senMans$AIper2)
senMans$BQr1 = senMans$BQr1
senMans$BQr2 = senMans$BQr2
senMans$BQa1 = senMans$BQa1
senMans$BQa2 = senMans$BQa2
senMans$CQ1 = senMans$CQ1
senMans$CQ2 = senMans$CQ2
senMans$rho1 = senMans$Rho1
senMans$rho2 = senMans$Rho2
senMans$difPart1difPart2 = senMans$DIF1divDIF2
senMans$type = "SeniorManager"
senMans = cbind(senMans[,48:58], senMans[,38:39], senMans[59:72], senMans[,40:41], senMans[,73:75], senMans[,42:47], senMans[,76:79])
senMans[, 1:41] = sapply(senMans[, 1:41], as.numeric)
senMans$type = factor(senMans$type)
senMans = na.omit(senMans)
senMans$aiPercPart1 = (senMans$aiPercPart1) /100
senMans$aiPercPart2 = (senMans$aiPercPart2) /100
senMans$dimPercPart1 = (senMans$dimPercPart1) /100
senMans$dimPercPart2 = (senMans$dimPercPart2) /100
senMans$intPercPart1 = (senMans$intPercPart1) /100
senMans$intPercPart2 = (senMans$intPercPart2) /100
senMans$difPart1difPart2 = (senMans$difPart1difPart2) / 100
# Binding datasets together to analyze and draw out differences between groups
comb = rbind(execs, colStus, entreps, senMans)
comb[, 1:41] = sapply(comb[, 1:41], as.numeric)
comb$type = factor(comb$type)
a = ggplot() +
geom_density(aes(x = execs$CQ1), alpha = 0.5, fill = "blue") +
geom_density(aes(x = colStus$CQ1), alpha = 0.5, fill = "green") +
geom_density(aes(x = entreps$CQ1), alpha = 0.5, fill = "red") +
geom_density(aes(x = senMans$CQ1), alpha = 0.5, fill = "black") +
labs(x = "CQ1")
b = ggplot() +
geom_density(aes(x = execs$rho1), alpha = 0.5, fill = "blue") +
geom_density(aes(x = colStus$rho1), alpha = 0.5, fill = "green") +
geom_density(aes(x = entreps$rho1), alpha = 0.5, fill = "red") +
geom_density(aes(x = senMans$rho1), alpha = 0.5, fill = "black") +
labs(x = "Rho1")
c = ggplot() +
geom_density(aes(x = execs$rho2), alpha = 0.5, fill = "blue") +
geom_density(aes(x = colStus$rho2), alpha = 0.5, fill = "green") +
geom_density(aes(x = entreps$rho2), alpha = 0.5, fill = "red") +
geom_density(aes(x = senMans$rho2), alpha = 0.5, fill = "black") +
labs(x = "Rho2")
e = ggplot() +
geom_density(aes(x = execs$aiPercPart1), alpha = 0.5, fill = "blue") +
geom_density(aes(x = colStus$aiPercPart1), alpha = 0.5, fill = "green") +
geom_density(aes(x = entreps$aiPercPart1), alpha = 0.5, fill = "red") +
geom_density(aes(x = senMans$aiPercPart1), alpha = 0.5, fill = "black") +
labs(x = "AI Percentage Part 1")
f = ggplot() +
geom_density(aes(x = execs$aiPercPart2), alpha = 0.5, fill = "blue") +
geom_density(aes(x = colStus$aiPercPart2), alpha = 0.5, fill = "green") +
geom_density(aes(x = entreps$aiPercPart2), alpha = 0.5, fill = "red") +
geom_density(aes(x = senMans$aiPercPart2), alpha = 0.5, fill = "black") +
labs(x = "AI Percentage Part 2")
grid.arrange(a, b, c, e, f, ncol = 2)
ggplot(comb, aes(rho1, rho2)) + geom_point(alpha = 0.5, aes(color = type))
cor(comb$rho1, comb$rho2)
[1] 0.2192963
pcaentreps = prcomp(entreps[1:41], scale = TRUE, center = TRUE)
pcaentreps$rotation[1:41,1:7]
PC1 PC2 PC3 PC4 PC5 PC6 PC7
dimIPart1 -0.12069640 -0.123524888 -0.280651425 0.103192230 -0.140186166 0.3601000406 -0.058300903
dimEPart1 -0.06244202 -0.131777436 -0.267692196 -0.103567015 0.249638635 -0.4236877477 0.016973018
dimSPart1 -0.12129789 -0.176498626 0.213245707 -0.042604505 -0.088937855 0.0450857224 0.116988731
intIPart1 -0.13523631 -0.123565782 -0.280311962 0.079127382 -0.085743348 0.3062872682 -0.055220989
intEPart1 -0.08515984 -0.134503686 -0.261684043 -0.071197550 0.217393685 -0.4092140176 0.041656885
intSPart1 -0.13530821 -0.165022540 0.195992944 -0.015313083 -0.102744990 0.0781306460 0.214645690
difPart1 -0.15628544 -0.215867376 -0.172407332 -0.008182514 -0.013391155 0.0398166053 0.032250141
intPart1 -0.17438950 -0.202501096 -0.157256269 0.002532267 -0.010297598 0.0319369463 0.090578039
dimPart1 -0.06277267 -0.183905574 0.377175381 -0.022544389 -0.057504248 -0.0382445349 -0.140096222
diPart1 -0.10775254 -0.189272693 0.272535905 0.023795344 -0.091789486 0.0332984473 -0.084088565
disPart1 -0.08321179 -0.168482833 0.057078014 -0.168862544 0.247375071 -0.1503491941 -0.177533817
VQ1 -0.15831495 -0.245597763 0.011909938 -0.024490278 -0.010832894 0.0027331458 -0.020443285
VQ2 -0.14314593 -0.241631634 0.146325147 -0.033916151 -0.007809680 -0.0248502082 -0.057077175
dimPercPart1 0.00808881 -0.110186938 0.434803390 -0.040170113 -0.074955568 -0.0941064939 -0.156408176
intPercPart1 -0.16125711 -0.184561676 -0.120559394 0.025736112 -0.042327465 0.0356566629 0.178990259
aiPercPart1 -0.08767945 -0.174959392 0.055158165 -0.100104020 0.305772891 -0.1422498725 -0.275044916
dimIPart2 -0.14776329 0.108864476 -0.029845360 0.057100406 -0.378755788 -0.3338790647 0.110127024
dimEPart2 -0.14984317 0.078383533 0.098475449 -0.226210961 0.245206584 0.1547649967 0.407762743
dimSPart2 -0.15784804 0.113894963 -0.084276880 -0.205835540 -0.024346189 0.1483906850 -0.414381995
intIPart2 -0.14574559 0.107317952 -0.040087884 0.105386826 -0.366006609 -0.3714411657 0.044759095
intEPart2 -0.14870418 0.081157562 0.114404191 -0.199116157 0.272347220 0.1618139879 0.392669767
intSPart2 -0.15699080 0.109900319 -0.070125675 -0.215041727 -0.021961821 0.1514463262 -0.419012123
difPart2 -0.20579000 0.142507817 -0.020173813 -0.171594370 -0.050757919 0.0104991124 0.051997341
intPart2 -0.21602834 0.142777668 0.000491846 -0.154860432 -0.048818105 -0.0160505316 -0.005328665
dimPart2 -0.15728419 0.054306716 0.073514826 0.402026971 0.198725715 0.0116993945 -0.005629773
diPart2 -0.18607730 0.070913399 0.044816612 0.327639052 0.059674961 -0.0160941564 0.011586406
disPart2 -0.18109900 0.027291162 -0.029184098 0.061448936 -0.174305545 -0.0083761239 -0.057231054
SQ1 -0.23058673 0.131511815 0.017326106 -0.013030958 0.006481007 -0.0005600189 0.010428703
SQ2 -0.22807667 0.115410176 0.035055041 0.121762523 0.054970741 -0.0043685680 -0.012105962
dimPercPart2 -0.05987428 -0.005804932 0.080996475 0.504171274 0.276227716 0.0216252630 -0.018593912
intPercPart2 -0.19381799 0.149751132 0.007383423 -0.157290033 -0.011865121 -0.0523715281 0.038751384
aiPercPart2 -0.18533531 0.031240128 0.027323392 0.102086324 -0.150696697 -0.0446281282 0.060737081
BQr1 -0.06139525 0.294337515 0.012297982 0.023297723 0.092567027 -0.0217626548 -0.016483125
BQr2 -0.03688526 0.256881502 -0.043190282 0.122198449 0.144006618 0.0028720991 -0.081220726
BQa1 -0.25182578 -0.047709376 0.017181977 -0.022982853 -0.003383479 0.0034339226 -0.004768524
BQa2 -0.24208711 -0.063070666 0.108615610 0.067582221 0.032022592 -0.0159416656 -0.044311485
CQ1 -0.17398706 0.127401411 0.045979177 -0.001851696 0.177035457 0.0696551636 -0.089196501
CQ2 -0.16992315 0.153285395 0.027979947 0.194281466 0.103424143 -0.0473821128 -0.125393284
rho1 0.16854235 0.199082824 0.171609566 -0.013317910 0.022145953 -0.0518230246 -0.027825327
rho2 0.22307773 -0.132955784 0.004136560 0.103965707 0.064312283 0.0320682178 -0.005176128
difPart1difPart2 0.05300314 -0.271927572 -0.121915454 0.167068508 0.015581164 0.0221992232 -0.036148907
biplot(pcaentreps)
entsenman = rbind(entreps, senMans)
pcaentsenman = prcomp(entsenman[1:41], scale = TRUE, center = TRUE)
summary(pcaentsenman)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13
Standard deviation 3.7484 3.2900 1.84657 1.65201 1.30616 1.2132 1.17301 1.09879 0.98394 0.94910 0.72210 0.53346 0.50178
Proportion of Variance 0.3427 0.2640 0.08317 0.06656 0.04161 0.0359 0.03356 0.02945 0.02361 0.02197 0.01272 0.00694 0.00614
Cumulative Proportion 0.3427 0.6067 0.68987 0.75644 0.79805 0.8339 0.86750 0.89695 0.92057 0.94254 0.95525 0.96219 0.96834
PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24 PC25 PC26
Standard deviation 0.48097 0.4344 0.40233 0.35836 0.3201 0.30274 0.25351 0.24730 0.22923 0.20710 0.19887 0.18267 0.16760
Proportion of Variance 0.00564 0.0046 0.00395 0.00313 0.0025 0.00224 0.00157 0.00149 0.00128 0.00105 0.00096 0.00081 0.00069
Cumulative Proportion 0.97398 0.9786 0.98253 0.98566 0.9882 0.99040 0.99196 0.99346 0.99474 0.99578 0.99675 0.99756 0.99825
PC27 PC28 PC29 PC30 PC31 PC32 PC33 PC34 PC35 PC36 PC37 PC38
Standard deviation 0.14635 0.12490 0.10711 0.09949 0.08871 0.06077 0.03549 0.01775 0.01602 0.009332 7.284e-16 4.765e-16
Proportion of Variance 0.00052 0.00038 0.00028 0.00024 0.00019 0.00009 0.00003 0.00001 0.00001 0.000000 0.000e+00 0.000e+00
Cumulative Proportion 0.99877 0.99915 0.99943 0.99967 0.99986 0.99995 0.99998 0.99999 1.00000 1.000000 1.000e+00 1.000e+00
PC39 PC40 PC41
Standard deviation 4.558e-16 4.22e-16 3.57e-16
Proportion of Variance 0.000e+00 0.00e+00 0.00e+00
Cumulative Proportion 1.000e+00 1.00e+00 1.00e+00
ggplot(as.data.frame(pcaentsenman$x), aes(PC2, PC4)) +
geom_point(aes(color = entsenman$type))
entsenman = rbind(entreps, senMans)
meltentsenman = melt(entsenman)
Using type as id variables
ggplot(meltentsenman) +
geom_density(aes(x = value, fill = type), alpha = 0.5) +
facet_wrap( ~ variable, scales = "free", ncol = 4)
confusionMatrix(preds, test$type)
Confusion Matrix and Statistics
Reference
Prediction Entrepreneur SeniorManager
Entrepreneur 27 11
SeniorManager 19 40
Accuracy : 0.6907
95% CI : (0.5888, 0.7807)
No Information Rate : 0.5258
P-Value [Acc > NIR] : 0.0007024
Kappa : 0.3745
Mcnemar's Test P-Value : 0.2012426
Sensitivity : 0.5870
Specificity : 0.7843
Pos Pred Value : 0.7105
Neg Pred Value : 0.6780
Prevalence : 0.4742
Detection Rate : 0.2784
Detection Prevalence : 0.3918
Balanced Accuracy : 0.6856
'Positive' Class : Entrepreneur
feature selection
ffs = regsubsets(type ~ ., data = entsenman, nvmax = 10)
5 linear dependencies found
Reordering variables and trying again:
summary(ffs)
Subset selection object
Call: regsubsets.formula(type ~ ., data = entsenman, nvmax = 10)
41 Variables (and intercept)
Forced in Forced out
dimIPart1 FALSE FALSE
dimEPart1 FALSE FALSE
dimSPart1 FALSE FALSE
intIPart1 FALSE FALSE
intEPart1 FALSE FALSE
intSPart1 FALSE FALSE
intPart1 FALSE FALSE
dimPart1 FALSE FALSE
diPart1 FALSE FALSE
disPart1 FALSE FALSE
VQ1 FALSE FALSE
dimPercPart1 FALSE FALSE
intPercPart1 FALSE FALSE
aiPercPart1 FALSE FALSE
dimIPart2 FALSE FALSE
dimEPart2 FALSE FALSE
dimSPart2 FALSE FALSE
intIPart2 FALSE FALSE
intEPart2 FALSE FALSE
intSPart2 FALSE FALSE
difPart2 FALSE FALSE
dimPart2 FALSE FALSE
diPart2 FALSE FALSE
disPart2 FALSE FALSE
dimPercPart2 FALSE FALSE
intPercPart2 FALSE FALSE
aiPercPart2 FALSE FALSE
BQr1 FALSE FALSE
BQr2 FALSE FALSE
BQa1 FALSE FALSE
BQa2 FALSE FALSE
CQ1 FALSE FALSE
CQ2 FALSE FALSE
rho1 FALSE FALSE
rho2 FALSE FALSE
difPart1difPart2 FALSE FALSE
difPart1 FALSE FALSE
VQ2 FALSE FALSE
intPart2 FALSE FALSE
SQ1 FALSE FALSE
SQ2 FALSE FALSE
1 subsets of each size up to 11
Selection Algorithm: exhaustive
dimIPart1 dimEPart1 dimSPart1 intIPart1 intEPart1 intSPart1 difPart1 intPart1
1 ( 1 ) " " " " " " " " " " " " " " " "
2 ( 1 ) " " " " " " " " "*" " " " " " "
3 ( 1 ) " " " " " " " " " " " " "*" " "
4 ( 1 ) " " " " " " " " " " " " "*" " "
5 ( 1 ) " " " " " " " " " " " " "*" " "
6 ( 1 ) " " " " " " " " " " " " "*" " "
7 ( 1 ) " " " " " " " " " " " " "*" " "
8 ( 1 ) " " " " " " " " " " " " "*" " "
9 ( 1 ) " " " " " " " " " " " " "*" " "
10 ( 1 ) " " " " " " " " " " " " "*" " "
11 ( 1 ) " " " " " " " " " " " " "*" " "
dimPart1 diPart1 disPart1 VQ1 VQ2 dimPercPart1 intPercPart1 aiPercPart1 dimIPart2
1 ( 1 ) " " " " " " " " " " " " " " " " " "
2 ( 1 ) " " " " " " " " " " " " " " " " " "
3 ( 1 ) " " " " " " "*" "*" " " " " " " " "
4 ( 1 ) " " " " "*" "*" "*" " " " " " " " "
5 ( 1 ) " " " " "*" " " "*" " " " " " " " "
6 ( 1 ) " " " " "*" "*" "*" " " " " " " " "
7 ( 1 ) " " " " "*" "*" "*" " " " " " " "*"
8 ( 1 ) " " " " "*" "*" "*" " " " " " " "*"
9 ( 1 ) " " " " "*" "*" "*" " " " " " " "*"
10 ( 1 ) " " " " "*" "*" "*" " " " " " " "*"
11 ( 1 ) " " " " "*" "*" "*" " " " " " " "*"
dimEPart2 dimSPart2 intIPart2 intEPart2 intSPart2 difPart2 intPart2 dimPart2
1 ( 1 ) " " " " " " " " " " " " " " " "
2 ( 1 ) " " " " " " " " " " " " " " " "
3 ( 1 ) " " " " " " " " " " " " " " " "
4 ( 1 ) " " " " " " " " " " " " " " " "
5 ( 1 ) " " " " " " " " " " " " " " " "
6 ( 1 ) " " " " " " " " " " " " " " " "
7 ( 1 ) " " " " " " " " " " " " " " " "
8 ( 1 ) " " " " " " "*" " " " " " " " "
9 ( 1 ) " " " " " " "*" " " " " " " " "
10 ( 1 ) " " "*" " " "*" " " " " " " " "
11 ( 1 ) " " "*" " " "*" " " " " " " " "
diPart2 disPart2 SQ1 SQ2 dimPercPart2 intPercPart2 aiPercPart2 BQr1 BQr2 BQa1
1 ( 1 ) " " " " " " " " " " " " " " " " " " " "
2 ( 1 ) " " " " " " " " " " " " " " " " " " " "
3 ( 1 ) " " " " " " " " " " " " " " " " " " " "
4 ( 1 ) " " " " " " " " " " " " " " " " " " " "
5 ( 1 ) " " " " "*" " " " " " " " " " " " " "*"
6 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
7 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
8 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
9 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
10 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
11 ( 1 ) " " " " " " "*" " " " " " " " " " " " "
BQa2 CQ1 CQ2 rho1 rho2 difPart1difPart2
1 ( 1 ) " " "*" " " " " " " " "
2 ( 1 ) " " "*" " " " " " " " "
3 ( 1 ) " " " " " " " " " " " "
4 ( 1 ) " " " " " " " " " " " "
5 ( 1 ) " " " " " " " " " " " "
6 ( 1 ) "*" " " " " " " " " " "
7 ( 1 ) "*" " " " " " " " " " "
8 ( 1 ) "*" " " " " " " " " " "
9 ( 1 ) "*" " " " " " " "*" " "
10 ( 1 ) "*" " " " " " " "*" " "
11 ( 1 ) "*" " " " " " " "*" "*"
a = data.frame(x = rep(1:42), y = ffs$rss)
ggplot(a, aes(x, y)) + geom_point() + labs(x = "# of Features", y = "RSS")
Important variables from FFS are:
ffs$xnames[c(6, 9, 19, 22, 27, 39)]
[1] "intEPart1" "dimPart1" "intIPart2" "difPart2" "intPercPart2" "VQ2"
Variables where the 2 groups differ:
# differ
print("Variables that differ between groups:")
[1] "Variables that differ between groups:"
for (i in names(entsenman)[1:41]) {
a = wilcox.test(get(i) ~ type, data = entsenman)
if (a$p.value < 0.05) {
print(c(i, a$p.value))
}
}
[1] "dimEPart1" "0.00240878216306191"
[1] "dimSPart1" "0.00998679439625931"
[1] "intEPart1" "0.000414251429586648"
[1] "intSPart1" "0.0128787501043578"
[1] "difPart1" "0.0311585017446253"
[1] "intPart1" "0.0209344920686297"
[1] "VQ1" "0.0287057955786282"
[1] "VQ2" "0.0140518727038859"
[1] "intPercPart1" "0.0204723685155504"
[1] "dimIPart2" "0.00118297965230699"
[1] "intIPart2" "0.00470298154277639"
[1] "difPart2" "0.0287260180393314"
[1] "BQr1" "0.00326264371202953"
[1] "BQr2" "0.00392589669009157"
[1] "CQ1" "1.31308755229483e-06"
[1] "CQ2" "2.95684186641073e-07"
[1] "rho1" "0.00735204490117357"
[1] "difPart1difPart2" "0.00106469953419607"
print(" ")
[1] " "
# don't differ
print("Variables that don't differ between groups:")
[1] "Variables that don't differ between groups:"
for (i in names(entsenman)[1:41]) {
a = wilcox.test(get(i) ~ type, data = entsenman)
if (a$p.value > 0.05) {
print(c(i, a$p.value))
}
}
[1] "dimIPart1" "0.484564468983294"
[1] "intIPart1" "0.352122291646371"
[1] "dimPart1" "0.168509457529553"
[1] "diPart1" "0.274930934966402"
[1] "disPart1" "0.911874291987055"
[1] "dimPercPart1" "0.726297483135801"
[1] "aiPercPart1" "0.888167633589448"
[1] "dimEPart2" "0.0556588112955014"
[1] "dimSPart2" "0.868425305958607"
[1] "intEPart2" "0.25890114674539"
[1] "intSPart2" "0.831856245784447"
[1] "intPart2" "0.101932976083815"
[1] "dimPart2" "0.648539642288582"
[1] "diPart2" "0.295206337448447"
[1] "disPart2" "0.0761111547877471"
[1] "SQ1" "0.0539467038098364"
[1] "SQ2" "0.164549556351549"
[1] "dimPercPart2" "0.547675988143944"
[1] "intPercPart2" "0.49441217619342"
[1] "aiPercPart2" "0.100255607484888"
[1] "BQa1" "0.869720709150679"
[1] "BQa2" "0.740919679141109"
[1] "rho2" "0.121063525116053"
EFA in R
covMat = cov(comb[1:41])
### some covariance is too high, need to remove variables that covary too much to run factor analysis!
factors = factanal(covmat = covMat, factors = 7, n.obs = nrow(comb), rotation = "varimax")
Error in solve.default(cv) :
system is computationally singular: reciprocal condition number = 3.20404e-19